import pandas as pd  
from sklearn.preprocessing import StandardScaler, LabelEncoder  
from sklearn.cluster import KMeans  
import pickle
  
# Load the dataset  
credit_customers = pd.read_csv("credit_customers.csv")  
  
# Extract the important columns  
important_columns = ['credit_history', 'age', 'employment', 'credit_amount', 'savings_status']  
data_for_clustering = credit_customers[important_columns].copy()  
  
# Preprocess the data  
data_for_clustering['savings_status'] = LabelEncoder().fit_transform(data_for_clustering['savings_status'])  
data_for_clustering['employment'] = LabelEncoder().fit_transform(data_for_clustering['employment'])  
data_for_clustering = pd.get_dummies(data_for_clustering, columns=['credit_history'], drop_first=True)  
  
# Normalize the data  
data_for_clustering_scaled = StandardScaler().fit_transform(data_for_clustering)  
  
# Perform K-means clustering  
kmeans = KMeans(n_clusters=4, random_state=42)  
cluster_labels = kmeans.fit_predict(data_for_clustering_scaled)  
  
# Add the cluster labels to the original dataset  
credit_customers['cluster'] = cluster_labels  
  
# Return the clustering algorithm used, parameters chosen, and resulting customer segments  
result = ("K-means", {"n_clusters": 4, "random_state": 42}, credit_customers[['credit_history', 'age', 'employment', 'credit_amount', 'savings_status', 'cluster']])  
  
print("result:\n", result)  
pickle.dump(result,open("./ref_result/result.pkl","wb"))
